################################################################################
##                             Data Analysis for:                             ##
##                          A Desire Named Streetcar                          ##
##          The Politics of Streetcar Infrastructure in Urban America         ##
##                                                                            ##
##       Thesis presented by Yanxi Fang to the Department of Government       ##
##                         Harvard College, March 2023                        ##
################################################################################

# load packages
library(tidyverse)
library(readxl)
library(stargazer)

# read in raw data: streetcars
streetcar <- read_csv("streetcars.csv")

################################################################################
##                             City-Level Analysis                            ##

# read in raw data: city-level
census_city_00 <- read_excel("SOC_Census/place00.xls")
census_city_90 <- read_excel("SOC_Census/place90.xls")
census_city_key <- read_excel("SOC_Census/placeid.xls")

# manipulating data to facilitate the merging of datasets
streetcar_city <- streetcar %>%
  mutate(city_name = ifelse(state == "CT", paste(city_name, "town"), 
                                           paste(city_name, "city"))) %>%
  mutate(city_name = ifelse(city_name == "Indianapolis city", 
                                           paste(city_name, "(balance)"), 
                                           city_name)) %>%
  mutate(city_name = ifelse(city_name == "Augusta city", 
                                           "Augusta-Richmond County (balance)", 
                                           city_name)) %>%
  mutate(city_name = ifelse(city_name == "Arlington city", 
                                           "Arlington CDP", city_name)) %>%
  mutate(city_name = ifelse(city_name == "Honolulu city", 
                                           "Honolulu CDP", city_name))

census_city_key <- census_city_key %>%
  mutate(city_name = cityname) %>%
  mutate(state = STUSAB)

# join census dataset with streetcar dataset
census_city_00 <- census_city_00 %>%
  left_join(census_city_key, by = "stplac")
census_city_90 <- census_city_90 %>%
  left_join(census_city_key, by = "stplac")

streetcar_city_00 <- streetcar_city %>%
  left_join(census_city_00, by = c("city_name", "state"))
streetcar_city_90 <- streetcar_city %>%
  left_join(census_city_90, by = c("city_name", "state"))

# manipulate data
streetcar_city_00 <- streetcar_city_00 %>%
  mutate(logpop = log(pop100)) %>%
  mutate(pctwhite = nhspwht/racebase) %>%
  mutate(pctblack = nhspblk/racebase) %>%
  mutate(pctother = nhspoth/racebase) %>%
  mutate(pcthisp = tothisp/racebase) %>%
  mutate(pctempl = employed/labforce) %>%
  mutate(pctnilf = nilabfor/pop100) %>%
  mutate(pctpov = povpop/povbase) %>%
  mutate(logmedinc = log(medfminc)) %>%
  mutate(pcthsgrad = hsgrad/edubase) %>%
  mutate(pctcollgrad = collgrad/edubase) %>%
  mutate(pctimm = immgrnt/immbase)

streetcar_city_90 <- streetcar_city_90 %>%
  mutate(logpop = log(pop100)) %>%
  mutate(pctwhite = nhspwht/racebase) %>%
  mutate(pctblack = nhspblk/racebase) %>%
  mutate(pctother = nhspoth/racebase) %>%
  mutate(pcthisp = tothisp/racebase) %>%
  mutate(pctempl = employed/labforce) %>%
  mutate(pctnilf = nilabfor/pop100) %>%
  mutate(pctpov = povpop/povbase) %>%
  mutate(logmedinc = log(medfminc)) %>%
  mutate(pcthsgrad = hsgrad/edubase) %>%
  mutate(pctcollgrad = collgrad/edubase) %>%
  mutate(pctimm = immgrnt/immbase)

# analysis
city_00_linear <- lm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                     pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm, 
                     data = streetcar_city_00)
city_00_logit <- glm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                     pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm,
                     data = streetcar_city_00, family = "binomial")

city_90_linear <- lm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                     pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm, 
                     data = streetcar_city_90)
city_90_logit <- glm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                     pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm,
                     data = streetcar_city_90, family = "binomial")

city_00_simple <- lm(built ~ logpop, data = streetcar_city_00)
city_90_simple <- lm(built ~ logpop, data = streetcar_city_90)

city_00_simple_lg <- glm(built ~ logpop, data = streetcar_city_00, family = "binomial")
city_90_simple_lg <- glm(built ~ logpop, data = streetcar_city_90, family = "binomial")

################################################################################
##                      Metropolitan-Area Level Analysis                      ##

# read in raw data: MSA-level
census_msa_00 <- read_excel("SOC_Census/msa00.xls")
census_msa_90 <- read_excel("SOC_Census/msa90.xls")
census_msa_key <- read_excel("SOC_Census/msanecmaid.xls")

# manipulating data to facilitate the merging of datasets
streetcar_msa <- streetcar_city

census_msa_key <- census_msa_key %>%
  mutate(msanecma = MSANECMA)

streetcar_msa <- streetcar_msa %>%
  left_join(census_city_key, by = c("city_name", "state")) %>%
  left_join(census_msa_key, by = "msanecma")

# join census dataset with streetcar dataset
streetcar_msa_00 <- streetcar_msa %>%
  left_join(census_msa_00, by = "msanecma")
streetcar_msa_90 <- streetcar_msa %>%
  left_join(census_msa_90, by = "msanecma")

# manipulate data
streetcar_msa_00 <- streetcar_msa_00 %>%
  mutate(logpop = log(pop100)) %>%
  mutate(pctwhite = nhspwht/racebase) %>%
  mutate(pctblack = nhspblk/racebase) %>%
  mutate(pctother = nhspoth/racebase) %>%
  mutate(pcthisp = tothisp/racebase) %>%
  mutate(pctempl = employed/labforce) %>%
  mutate(pctnilf = nilabfor/pop100) %>%
  mutate(pctpov = povpop/povbase) %>%
  mutate(logmedinc = log(medfminc_msa)) %>%
  mutate(pcthsgrad = hsgrad/edubase) %>%
  mutate(pctcollgrad = collgrad/edubase) %>%
  mutate(pctimm = immgrnt/immbase)

streetcar_msa_90 <- streetcar_msa_90 %>%
  mutate(logpop = log(pop90)) %>%
  mutate(pctwhite = nhspwht/racebase) %>%
  mutate(pctblack = nhspblk/racebase) %>%
  mutate(pctother = nhspoth/racebase) %>%
  mutate(pcthisp = tothisp/racebase) %>%
  mutate(pctempl = employed/labforce) %>%
  mutate(pctnilf = nilabfor/pop90) %>%
  mutate(pctpov = povpop/povbase) %>%
  mutate(logmedinc = log(medfminc)) %>%
  mutate(pcthsgrad = hsgrad/edubase) %>%
  mutate(pctcollgrad = collgrad/edubase) %>%
  mutate(pctimm = immgrnt/immbase)

# analysis
msa_00_linear <- lm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                    pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm, 
                    data = streetcar_msa_00)
msa_00_logit <- glm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                    pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm,
                    data = streetcar_msa_00, family = "binomial")

msa_90_linear <- lm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                    pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm, 
                    data = streetcar_msa_90)
msa_90_logit <- glm(built ~ logpop + pctblack + pctother + pcthisp + pctempl + pctempl +
                    pctnilf + pctpov + logmedinc + pcthsgrad + pctcollgrad + pctimm,
                    data = streetcar_msa_90, family = "binomial")

msa_00_simple <- lm(built ~ logpop, data = streetcar_msa_00)
msa_90_simple <- lm(built ~ logpop, data = streetcar_msa_90)

msa_00_simple_lg <- glm(built ~ logpop, data = streetcar_msa_00, family = "binomial")
msa_90_simple_lg <- glm(built ~ logpop, data = streetcar_msa_90, family = "binomial")

################################################################################
##                              Printing Results                              ##

stargazer(city_90_simple, city_90_linear, msa_90_simple, msa_90_linear,
          city_00_simple, city_00_linear, msa_00_simple, msa_00_linear,
          type = "text")

stargazer(city_90_simple_lg, city_90_logit, msa_90_simple_lg, msa_90_logit,
          city_00_simple_lg, city_00_logit, msa_00_simple_lg, msa_00_logit,
          type = "text")
